/*==============================================================================
FILE NAME: Figure_D2.do
CREATED: 6 July 2025
==============================================================================*/
clear all

/* Set directory if working independently through code
if c(username)=="" { //insert username
	global rootdir "" // insert root path
	global processed_data "$rootdir/processed_data" 
	global raw_data "$rootdir/raw_data"
	global figures "$rootdir/output/figures" // Define global paths for replication package
} 
*/

set more off

// Import raw data
infile using "$raw_data/Zipcode Decennial Census Demographics/R13696237.dct", using("$raw_data/Zipcode Decennial Census Demographics/R13696237_SL860.txt")

drop SUBMCD SDELM SDSEC SDUNI UR PCI TAZ UGA PUMA5 PUMA1 GEOID BTTR BTBG FIPS NAME QName STUSAB SUMLEV GEOCOMP FILEID LOGRECNO US REGION DIVISION STATECE STATE COUNTY COUSUB PLACE PLACESE TRACT BLKGRP CONCIT AIANHH AIANHHFP AIHHTLI AITSCE AITS ANRC CBSA CSA METDIV MACC MEMI NECTA CNECTA NECTADIV UA UACP CDCURR SLDU SLDL VTD ZCTA3

rename ZCTA5 ZipCode
destring ZipCode, replace force

drop if ZipCode < 73301 
drop if ZipCode < 75001 & ZipCode != 73301
drop if ZipCode > 88589
drop if ZipCode < 88510 & ZipCode > 79999 & ZipCode != .

rename A00002_001 tot_pop
rename A00002_002 pop_density

*no urban_share

gen hisp_share = A04001_010/A04001_001 
gen white_share = A04001_003/A04001_001
gen black_share = A04001_004/A04001_001
egen other = rsum(A04001_005 A04001_006 A04001_007 A04001_008 A04001_009)
gen other_share = other/A04001_001

rename A12001_004 some_college_plus
gen college_share = some_college_plus/A12001_001

gen LFPR = A17002_002/A17002_001

gen unemployment_rate = A17005_003/A17005_001 

rename A09003_001 avg_commute_time

rename A14006_001 median_hh_income
rename A14024_001 income_pc

rename A10057_001 median_year_built

gen child_poverty_rate = A13003A_002/A13003A_001
gen adult_poverty_rate = A13003B_002/A13003B_001

rename A10003_001 avg_hh_size

rename A10036_001 median_hh_value

rename A18009_001 median_rent 

drop A0* A1* AREALAND AREAWATR

save "$processed_data/zipcode_demographics_2012.dta", replace //zipcode demogrpahics for 2012 census info

// Panel A: Socio-Demographic and Economic Predictors of Citizen Complaints (2008-2012) for Air
use "$processed_data/unique_complaints.dta", clear

drop if year < 2003
drop if year > 2019

rename ZipCode_CIN ZipCode
rename County_CIN County
gen complaint = Number_Of_Complaints

drop if incident_air == 0
keep if incident_air != .

merge m:1 County using "$processed_data/Border_Counties.dta" 
keep if _m != 2
drop _m 

replace border_exact = 0 if border_exact == .
replace border_100km = 0 if border_100km == .

collapse (sum) complaint (mean) border_exact border_100km , by(ZipCode year)

collapse (mean) complaint border_exact border_100km, by(ZipCode) 

replace border_100km = 1 if border_100km >= 0.5
replace border_100km = 0 if border_100km < 0.5
replace border_exact = 1 if border_exact >= 0.5
replace border_exact = 0 if border_exact < 0.5

merge 1:1 ZipCode using "$processed_data/zipcode_demographics_2012.dta"
keep if _m != 1
drop _m

merge 1:1 ZipCode using "$processed_data/Emissions_events_count.dta"
keep if _m != 2
drop _m 

merge 1:1 ZipCode using "$processed_data/facility_count.dta"
keep if _m != 2
drop _m 

merge 1:1 ZipCode using "$processed_data/mean_PM25.dta"
keep if _m != 2
drop _m 

replace EmissionsEvent = 0 if EmissionsEvent == .

replace complaint = 0 if complaint == .
replace count_RN = 0 if count_RN == .

foreach x in tot_pop pop_density avg_hh_size median_hh_inc median_year_built median_hh_value median_rent {
replace `x' = . if `x' == 0
}

foreach x in mean_PM25 EmissionsEvent border_100km count_RN tot_pop pop_density avg_hh_size median_hh_inc median_year_built median_hh_value median_rent avg_commute_time hisp_share white_share black_share other_share college_share LFPR unemployment_rate child_poverty_rate adult_poverty_rate {
drop if `x' == .
}

gen median_housing_age = 2012 - median_year_built

foreach x in mean_PM25 EmissionsEvent border_100km count_RN tot_pop pop_density avg_hh_size median_hh_inc median_housing_age median_hh_value median_rent avg_commute_time hisp_share white_share black_share other_share college_share LFPR unemployment_rate child_poverty_rate adult_poverty_rate {
egen std_`x' = std(`x')
}

label var std_count_RN "Number of Plants"
label var std_EmissionsEvent "Average # of Emissions Events (2003-2016)"
label var std_mean_PM25 "Average PM2.5 Concentration (2000-2018)"
label var std_border_100km "Border County"
label var std_tot_pop "Total Population (2008-2012)"
label var std_white_share "White Share (2008-2012)"
label var std_black_share "Black Share (2008-2012)"
label var std_other_share "Other Share (2008-2012)"
label var std_hisp_share "Hispanic Share (2008-2012)"
label var std_pop_density "Population Density (2008-2012)"
label var std_median_hh_inc "Median Household Income (2008-2012)"
label var std_median_housing_age "Median Housing Age (2008-2012)"
label var std_college_share "Some College Share (2008-2012)"    
label var std_LFPR "Laborforce Participation Rate (2008-2012)"
label var std_unemployment_rate "Unemployment Rate (2008-2012)"
label var std_child_poverty_rate "Child Poverty Rate (2008-2012)"
label var std_adult_poverty_rate "Adult Poverty Rate (2008-2012)"


gen complaints_pc = complaint/tot_pop 
replace complaints_pc = complaints_pc*1000

foreach x in count_RN mean_PM25 border_100km pop_density median_hh_inc median_housing_age hisp_share white_share black_share other_share college_share LFPR unemployment_rate child_poverty_rate adult_poverty_rate {
reg complaints_pc std_`x', r
est store `x'
}

gen complaint_pc_IHS = log(complaints_pc + sqrt(1+complaints_pc^2))
replace complaint_pc_IHS = complaint_pc_IHS*100

gen logcomplaint_pc = log(complaints_pc)

foreach x in count_RN mean_PM25 border_100km pop_density median_hh_inc median_housing_age hisp_share white_share black_share other_share college_share LFPR child_poverty_rate adult_poverty_rate {
reg logcomplaint_pc std_`x', r
est store `x'

}
graph set window fontface "Times New Roman"
coefplot count_RN border_100km pop_density median_hh_inc median_housing_age hisp_share white_share black_share other_share college_share LFPR child_poverty_rate adult_poverty_rate mean_PM25 , transform(* = 100*(exp(@)-1)) xline(0) pstyle(p1) ciopts(recast(rcap)) graphregion(fcolor(white)) drop(_cons) offset(0) mcolor(navy) msize(medsmall) mfcolor(navy) ylabel(,labsize(large)) xlabel(-20(10)20, nogrid labsize(medsmall)) xtitle("Percent Change in the Number of Air" "Complaints per 1,000 people", size(large)) legend(off) ytitle("Standard Deviation Increase in...", size(large)) graphregion(fcolor(255 255 255)) graphregion(lcolor(255 255 255)) legend(off) xlabel(-100(20)100, angle(45) labsize(large)) name(rankchange_bv, replace) xsize(8.6)
graph export "$figures/Figure_D2_Panel_A.pdf", replace 


// Panel B: Socio-Demographic and Economic Predictors of Citizen Complaints (2008-2012) for Water
use "$processed_data/unique_complaints.dta", clear

drop if year < 2003
drop if year > 2019

rename ZipCode_CIN ZipCode
rename County_CIN County
gen complaint = Number_Of_Complaints

drop if incident_water == 0
keep if incident_water != .

merge m:1 County using "$processed_data/Border_Counties.dta" 
keep if _m != 2
drop _m 

replace border_exact = 0 if border_exact == .
replace border_100km = 0 if border_100km == .

collapse (sum) complaint (mean) border_exact border_100km , by(ZipCode year)

collapse (mean) complaint border_exact border_100km, by(ZipCode) 

replace border_100km = 1 if border_100km >= 0.5
replace border_100km = 0 if border_100km < 0.5
replace border_exact = 1 if border_exact >= 0.5
replace border_exact = 0 if border_exact < 0.5

merge 1:1 ZipCode using "$processed_data/zipcode_demographics_2012.dta"
keep if _m != 1
drop _m

merge 1:1 ZipCode using "$processed_data/Emissions_events_count.dta"
keep if _m != 2
drop _m 

merge 1:1 ZipCode using "$processed_data/facility_count.dta"
keep if _m != 2
drop _m 

merge 1:1 ZipCode using "$processed_data/mean_PM25.dta"
keep if _m != 2
drop _m 

replace EmissionsEvent = 0 if EmissionsEvent == .

replace complaint = 0 if complaint == .
replace count_RN = 0 if count_RN == .

foreach x in tot_pop pop_density avg_hh_size median_hh_inc median_year_built median_hh_value median_rent {
replace `x' = . if `x' == 0
}

foreach x in mean_PM25 EmissionsEvent border_100km count_RN tot_pop pop_density avg_hh_size median_hh_inc median_year_built median_hh_value median_rent avg_commute_time hisp_share white_share black_share other_share college_share LFPR unemployment_rate child_poverty_rate adult_poverty_rate {
drop if `x' == .
}

gen median_housing_age = 2000 - median_year_built

foreach x in mean_PM25 EmissionsEvent border_100km count_RN tot_pop pop_density avg_hh_size median_hh_inc median_housing_age median_hh_value median_rent avg_commute_time hisp_share white_share black_share other_share college_share LFPR unemployment_rate child_poverty_rate adult_poverty_rate {
egen std_`x' = std(`x')
}

label var std_count_RN "Number of Plants"
label var std_EmissionsEvent "Average # of Emissions Events (2003-2016)"
label var std_mean_PM25 "Average PM2.5 Concentration (2000-2018)"
label var std_border_100km "Border County"
label var std_tot_pop "Total Population (2008-2012)"
label var std_white_share "White Share (2008-2012)"
label var std_black_share "Black Share (2008-2012)"
label var std_other_share "Other Share (2008-2012)"
label var std_hisp_share "Hispanic Share (2008-2012)"
label var std_pop_density "Population Density (2008-2012)"
label var std_avg_hh_size "Avg. Household Size (2008-2012)"
label var std_median_hh_inc "Median Household Income (2008-2012)"
label var std_median_housing_age "Median Housing Age (2008-2012)"
label var std_median_hh_value "Median Housing Value (2008-2012)"
label var std_median_rent "Median Rent (2008-2012)"
label var std_avg_commute_time "Avg. Commute Time (2008-2012)"
label var std_college_share "Some College Share (2008-2012)"    
label var std_LFPR "Laborforce Participation Rate (2008-2012)"
label var std_unemployment_rate "Unemployment Rate (2008-2012)"
label var std_child_poverty_rate "Child Poverty Rate (2008-2012)"
label var std_adult_poverty_rate "Adult Poverty Rate (2008-2012)"

gen complaints_pc = complaint/tot_pop 
replace complaints_pc = complaints_pc*1000

foreach x in count_RN border_100km pop_density median_hh_inc median_housing_age hisp_share white_share black_share other_share college_share LFPR child_poverty_rate adult_poverty_rate {
reg complaints_pc std_`x', r
est store `x'
}

gen complaint_pc_IHS = log(complaints_pc + sqrt(1+complaints_pc^2))
replace complaint_pc_IHS = complaint_pc_IHS*100

gen logcomplaint_pc = log(complaints_pc)

foreach x in count_RN border_100km pop_density median_hh_inc median_housing_age hisp_share white_share black_share other_share college_share LFPR child_poverty_rate adult_poverty_rate {
reg logcomplaint_pc std_`x', r
est store `x'
}
graph set window fontface "Times New Roman"
coefplot count_RN border_100km pop_density median_hh_inc median_housing_age hisp_share white_share black_share other_share college_share LFPR child_poverty_rate adult_poverty_rate, transform(* = 100*(exp(@)-1)) xline(0) pstyle(p1) ciopts(recast(rcap)) graphregion(fcolor(white)) drop(_cons) offset(0) mcolor(navy) msize(medsmall) mfcolor(navy) ylabel(,labsize(large)) xlabel(-70(10)30, nogrid labsize(large)) xtitle("Percent Change in the Number of Water" "Complaints per 1,000 people", size(large)) legend(off) ytitle("Standard Deviation Increase in...", size(large)) graphregion(fcolor(255 255 255)) graphregion(lcolor(255 255 255)) legend(off) xlabel(-100(20)100, angle(45) labsize(large)) name(rankchange_bv, replace) xsize(8.6)
graph export "$figures/Figure_D2_Panel_B.pdf", replace 


// Panel C: Socio-Demographic and Economic Predictors of Citizen Complaints (2008-2012) for Waste
use "$processed_data/unique_complaints.dta", clear

drop if year < 2003
drop if year > 2019

rename ZipCode_CIN ZipCode
rename County_CIN County
gen complaint = Number_Of_Complaints

drop if incident_waste == 0
keep if incident_waste != .

merge m:1 County using "$processed_data/Border_Counties.dta" 
keep if _m != 2
drop _m 

replace border_exact = 0 if border_exact == .
replace border_100km = 0 if border_100km == .

collapse (sum) complaint (mean) border_exact border_100km , by(ZipCode year)

collapse (mean) complaint border_exact border_100km, by(ZipCode) 

replace border_100km = 1 if border_100km >= 0.5
replace border_100km = 0 if border_100km < 0.5
replace border_exact = 1 if border_exact >= 0.5
replace border_exact = 0 if border_exact < 0.5

merge 1:1 ZipCode using "$processed_data/zipcode_demographics_2012.dta"
keep if _m != 1
drop _m

merge 1:1 ZipCode using "$processed_data/Emissions_events_count.dta"
keep if _m != 2
drop _m 

merge 1:1 ZipCode using "$processed_data/facility_count.dta"
keep if _m != 2
drop _m 

merge 1:1 ZipCode using "$processed_data/mean_PM25.dta"
keep if _m != 2
drop _m 


replace EmissionsEvent = 0 if EmissionsEvent == .

replace complaint = 0 if complaint == .
replace count_RN = 0 if count_RN == .

foreach x in tot_pop pop_density avg_hh_size median_hh_inc median_year_built median_hh_value median_rent {
replace `x' = . if `x' == 0
}


foreach x in mean_PM25 EmissionsEvent border_100km count_RN tot_pop pop_density avg_hh_size median_hh_inc median_year_built median_hh_value median_rent avg_commute_time hisp_share white_share black_share other_share college_share LFPR unemployment_rate child_poverty_rate adult_poverty_rate {
drop if `x' == .
}

gen median_housing_age = 2000 - median_year_built

foreach x in mean_PM25 EmissionsEvent border_100km count_RN tot_pop pop_density avg_hh_size median_hh_inc median_housing_age median_hh_value median_rent avg_commute_time hisp_share white_share black_share other_share college_share LFPR unemployment_rate child_poverty_rate adult_poverty_rate {
egen std_`x' = std(`x')
}

label var std_count_RN "Number of Plants"
label var std_EmissionsEvent "Average # of Emissions Events (2003-2016)"
label var std_mean_PM25 "Average PM2.5 Concentration (2000-2018)"
label var std_border_100km "Border County"
label var std_tot_pop "Total Population (2008-2012)"
label var std_white_share "White Share (2008-2012)"
label var std_black_share "Black Share (2008-2012)"
label var std_other_share "Other Share (2008-2012)"
label var std_hisp_share "Hispanic Share (2008-2012)"
label var std_pop_density "Population Density (2008-2012)"
label var std_avg_hh_size "Avg. Household Size (2008-2012)"
label var std_median_hh_inc "Median Household Income (2008-2012)"
label var std_median_housing_age "Median Housing Age (2008-2012)"
label var std_median_hh_value "Median Housing Value (2008-2012)"
label var std_median_rent "Median Rent (2008-2012)"
label var std_avg_commute_time "Avg. Commute Time (2008-2012)"
label var std_college_share "Some College Share (2008-2012)"    
label var std_LFPR "Laborforce Participation Rate (2008-2012)"
label var std_unemployment_rate "Unemployment Rate (2008-2012)"
label var std_child_poverty_rate "Child Poverty Rate (2008-2012)"
label var std_adult_poverty_rate "Adult Poverty Rate (2008-2012)"

gen complaints_pc = complaint/tot_pop 
replace complaints_pc = complaints_pc*1000

foreach x in count_RN border_100km pop_density median_hh_inc median_housing_age hisp_share white_share black_share other_share college_share LFPR child_poverty_rate adult_poverty_rate {
reg complaints_pc std_`x', r
est store `x'
}

gen complaint_pc_IHS = log(complaints_pc + sqrt(1+complaints_pc^2))
replace complaint_pc_IHS = complaint_pc_IHS*100

gen logcomplaint_pc = log(complaints_pc)

foreach x in count_RN border_100km pop_density median_hh_inc median_housing_age hisp_share white_share black_share other_share college_share LFPR child_poverty_rate adult_poverty_rate {
reg logcomplaint_pc std_`x', r
est store `x'
}
graph set window fontface "Times New Roman"
coefplot count_RN border_100km pop_density median_hh_inc median_housing_age hisp_share white_share black_share other_share college_share LFPR child_poverty_rate adult_poverty_rate, transform(* = 100*(exp(@)-1)) xline(0) pstyle(p1) ciopts(recast(rcap)) graphregion(fcolor(white)) drop(_cons) offset(0) mcolor(navy) msize(medsmall) mfcolor(navy) ylabel(,labsize(large)) xlabel(-20(10)20, nogrid labsize(large)) xtitle("Percent Change in the Number of Waste" "Complaints per 1,000 people", size(large)) legend(off) ytitle("Standard Deviation Increase in...", size(large)) graphregion(fcolor(255 255 255)) graphregion(lcolor(255 255 255)) legend(off) xlabel(-100(20)100, labsize(large) angle(45)) name(rankchange_bv, replace) xsize(8.6)
graph export "$figures/Figure_D2_Panel_C.pdf", replace 
